#------------------------------------------Housekeeping-----------------------------------------#
#
# This is the primary replication file for "How windfall income increases spending at poker
# machines" published in the Economic Record
#
# This code replicates all analyses with one exception:
# "Figure 1: Average real monthly expenditures per EGM" which is created by pokies2.R
#
# 
# Created 22 Oct. 2013 by Kyle Peyton
# Last update 12 February 2013 by Kyle Peyton  
# Questions, comments, complaints? contact: kpeyto1@gmail.com 
#-----------------------------------------------------------------------------------------------#


# clear workspace 
rm(list=ls())

#Packages needed to reproduce this analysis. 
require(xtable)
require(foreign)
require(scales)
require(pcse)
require(sandwich)
require(lmtest)
require(strucchange)
require(ggplot2)
require(ggthemes)
require(gridExtra)
require(car)



####---------------------------------------Data management----------------------------------#####
# Create the estimated monthly anomoly (EMA) dummies: 96 of these, one for each period          #
#                                                                                               #
#                                                                                               #
##---------------------------------------------------------------------------------------------##

#import pokies data; path name depends on your system.
pokies <- read.dta('~/Dropbox/Pokies/replication/pokies06.dta') #Mac 

## estimate two way fixed effects model with seasonally adjusted/unadjusted pokie data 
pokie.twfe <- glm(rmonexp_egm ~ rmonexp_egm_1 + rmonexp_egm_12 + as.factor(lga_short) + as.factor(period) +yrs_minus_2004, data = pokies)

shortpokie <- pokies[-which(is.na(pokies$detrend_lga_12)),] ## keep only 84 periods 

##Drew Dimmery's method for replicating Stata's clustered SEs
robust.se <- function(model, cluster){
    require(sandwich)
    require(lmtest)
    M <- length(unique(cluster))
    N <- length(cluster)
    K <- model$rank
    dfc <- (M/(M - 1)) * ((N - 1)/(N - K))
    uj <- apply(estfun(model), 2, function(x) tapply(x, cluster, sum));
    rcse.cov <- dfc * sandwich(model, meat = crossprod(uj)/N)
    rcse.se <- coeftest(model, rcse.cov)
    return(list(rcse.cov, rcse.se))
  }

robustcluster.pokie <- robust.se(pokie.twfe,shortpokie$lga_short)[[1]]
  

####---------------------- recover betas and SEs for omitted LGAs ----------------------- ##### 
#
# The method used here is explained in detail in: Haisken-DeNew, John P. and Christoph M. 
# Schmidt (1997) "Inter-Industry and Inter-Region Differentials: Mechanics and Interpretation," 
# The Review of Economics and Statistics, 79, 3, August, 516-521.
#
#---------------------------------------------------------------------------------------------#
  
## first take out the betas for the LGAs 
oldlgabeta <- c(0,pokie.twfe$coefficients[4:64]) ## tack on placeholder for LGA 1

I <-diag(62) 
W <-matrix(1/62,62,62)

A = I-W
B = oldlgabeta 
newlgabeta <- A%*%B 
  
## transform standard errors to recover time period 13 and LGA = Alpine 
voflga <- robustcluster.pokie[4:64,4:64]


## add row and column of zeros
test <- data.frame(voflga)
new <- rep(0,61)
test <- data.frame(new,test)
test <- rbind(rep(0,61),test)
vofb <- as.matrix(test)

## using matrix algebra as specified by John HD
I <-diag(nrow(vofb)) 
W <-matrix(1/62,nrow(vofb),nrow(vofb)) ## weight of .1 for each of the 10 firms

A = I-W 
B = vofb
C = t(A)

newvar <- A%*%B%*%C

##extracts SEs for all 62 LGAs! here the first element in the SE vector belongs to Alpine 
lgases <- sqrt(diag(newvar))

## new matrix of betas and coefficients
lgaeffects <- cbind(newlgabeta,lgases)


#### recover betas and SEs for missing period ##### 

## first take out the betas for the periods 
oldperiodbeta <- c(0,pokie.twfe$coefficients[65:147]) ## tack on placeholder for LGA 1

I <-diag(84) 
W <-matrix(1/84,84,84)

A = I-W
B = oldperiodbeta 
newperiodbeta <- A%*%B 


## transform standard errors to recover time period 13 and LGA = Alpine 
vofperiod <- robustcluster.pokie[65:147,65:147]


## add row and column of zeros
test <- data.frame(vofperiod)
new <- rep(0,83)
test <- data.frame(new,test)
test <- rbind(rep(0,83),test)
vofb <- as.matrix(test)

## using matrix algebra as specified by John HD
I <-diag(nrow(vofb)) 
W <-matrix(1/83,nrow(vofb),nrow(vofb)) ## weight of .1 for each of the 10 firms

A = I-W 
B = vofb
C = t(A)

newvar <- A%*%B%*%C

##extracts SEs for all 62 LGAs! here the first element in the SE vector belongs to Alpine 
periodses <- sqrt(diag(newvar))

## new matrix of betas and coefficients
periodeffects <- data.frame(newperiodbeta,periodses)

periodeffects$month <- unique(pokies$month)
periodeffects$year <- c(rep(2005,6), rep(2006,12), rep(2007,12), rep(2008,12), rep(2009,12), rep(2010,12), rep(2011,12), rep(2012,6))

#plot(periodeffects$year, periodeffects$newperiodbeta )

out <- rep(NA, 84*83)
pval <- rep(NA, 84*83)

i <- 1
for (k in 1:84) {
  for(z in 1:84) {
    if(k != z) {
    out[i] <- abs((periodeffects$newperiodbeta[k] - periodeffects$newperiodbeta[z])/(sqrt((periodses[k])^2 + (periodses[z])^2)))
    pval[i] <- 2*pnorm(-out[i])
    
    i <- 1+i
    }
  }
}

### flag 'significant' p-values s.t. Bonferroni correction 

periodeffects$anom <- rep(NA, nrow(periodeffects))
i <- 83
k <- 1
for (a in seq(1,6972,83)) {
  periodeffects$anom[k] <- length(which(pval[a:i] <= .05/(83*84)) )
  print(i)
  k <- k + 1
  i <- i + 83
  
}
                
## which periods are significant 95% of the time ?
which(periodeffects$anom > 83*.85)
which(periodeffects$anom > 83*.90)
which(periodeffects$anom > 83*.95)


anomhist <- ggplot(periodeffects, aes(x=anom)) + geom_histogram(binwidth=5, colour = 'black', fill = 'white') + xlab(NULL) + ylab(NULL) + 
  theme_bw() + 
  theme(legend.title=element_blank(),
        panel.grid.minor.x=element_blank(), 
        panel.grid.minor.y=element_blank(),
        legend.position = c(.12,0.15),
        legend.text = element_text(color= 'black', size = 8),
        legend.key = element_rect(fill = 'white', size = 0.5, linetype='solid'),
        legend.key.size = unit(1, 'lines'),
        legend.background = element_rect(fill="transparent"),
        text = element_text(family = "serif", size = 12)) + 
  scale_y_continuous(name="Estimated Period Effects", breaks=seq(0,20,1)) + 
  scale_x_continuous(name="Significant Differences", breaks=seq(15,85,5)) 
print(anomhist)
ggsave(plot=anomhist, filename=paste('~/dropbox/pokies/anomhist',
                                           '.pdf', sep=''), scale=1.2,height=5,width=7.5)


#----------------------------------------------EMA Plot-----------------------------------------#
# Create the EMA plot (Figure 2 in paper)
#
#-----------------------------------------------------------------------------------------------#
ema.beta <- periodeffects$newperiodbeta
ema.robust <- periodeffects$periodses


month = c("Jul","Aug","Sep","Oct","Nov","Dec", "Jan","Feb","Mar","Apr","May","Jun")
month <- rep(month, 7)
ema.ts <- data.frame(ema.beta, month)
date <- as.yearmon(2005 + seq(6,89)/12)
ema.ts$date <- as.Date(date)

pd <- position_dodge(.2)

emaplot.robust <- ggplot(ema.ts,aes(date, ema.beta, label=date)) +  geom_hline(yintercept=0, colour = "grey", size = 0.25) +
  geom_point(position = pd, colour="black", size = 3, pch = 20, shape = 21, fill = 'white') +
  geom_point(data=subset(ema.ts, month == "Dec"), aes(date, ema.beta, label = date),
             colour = "black", fill = "white", size = 3, pch =20) +
  geom_point(data=ema.ts[42,], aes(date, ema.beta, label = date), colour="black", fill = "white", size = 4,
             pch = 18) +
  geom_point(data=subset(ema.ts, month == "Apr"), aes(date, ema.beta, label = date),
             colour = "black", fill = "white",size = 3, pch =20) +
  geom_point(data=ema.ts[46,], aes(date, ema.beta, label = date), colour="black", fill = "white", size = 4,
             pch = 18) +
  geom_point(data=subset(ema.ts, month == "Mar"), aes(date, ema.beta, label = date),
             colour = "black", fill = "white", size = 3, pch =20) +
  geom_point(data=ema.ts[45,], aes(date, ema.beta, label = date), colour="black", fill = "white", size = 4,
             pch = 18) +
  geom_point(data=subset(ema.ts, month == "May"), aes(date, ema.beta, label = date),
             colour = "black", fill = "white", size = 3, pch =20) +
  geom_point(data=ema.ts[47,], aes(date, ema.beta, label = date), colour="black", fill = "white", size = 3.5,
             pch = 18) +
  geom_point(data=ema.ts[83,], aes(date, ema.beta, label = date), colour="black", fill = "white", size = 3.5,
             pch = 15) +
  geom_point(data=ema.ts[84,], aes(date, ema.beta, label = date), colour="black", fill = "white", size = 3.5,
             pch = 15) +
  xlab(NULL) +
  ylab(NULL) +
  theme(axis.title.y = element_text(family="serif", face="bold", size=14)) +
  geom_text(data=ema.ts[42, ], label="Dec. '08", family="serif", face="bold", size=3,
            vjust=.35, hjust = 1.30, colour = "black") +
  geom_text(data=ema.ts[45, ], label="Mar. '09", family="serif", face="bold", size=3,
            vjust=-.60, hjust = -.15, colour = "black") +
  geom_text(data=ema.ts[46, ], label="Apr. '09", family="serif", face="bold", size=3,
            vjust=.35, hjust = 1.35, colour = "black") +
  geom_text(data=ema.ts[47, ], label="May '09", family="serif", face="bold", size=3,
            vjust=-1.95, hjust = 1.15, colour = "black") +
  geom_text(data=ema.ts[83, ], label="May '12", family="serif", face="bold", size=3,
            vjust=-2.05, hjust = 1.25, colour = "black") +
  geom_text(data=ema.ts[84, ], label="Jun. '12", family="serif", face="bold", size=3,
            vjust=-1.25, hjust = -0.15, colour = "black") +
  geom_errorbar(aes(ymin=ema.beta-(1.96*ema.robust), ymax=ema.beta+(1.96*ema.robust)), colour="black", width=.25,
                position=pd, height = 1) +
  scale_x_date(breaks = "1 year", labels=date_format("%Y")) +
  scale_y_continuous(name="Estimated Period Effect (AUD per EGM)",breaks=seq(-900,900, 100)) +
  theme_bw() +
  theme(axis.line = element_line(colour = "black"),
        #panel.grid.y.major = element_blank(),
        panel.grid.minor.x=element_blank(), 
        panel.grid.minor.y=element_blank(),
        panel.grid.major.x=element_blank(),
        panel.border = element_blank(),
        panel.background = element_blank(),
        text = element_text(family = "serif", size = 12)) 
print(emaplot.robust)

ggsave(plot=emaplot.robust, filename=paste('~/dropbox/pokies/bwemaplotrobust',
                                         '.jpg', sep=''), scale=1.2,height=6,width=10)


#---------------------------------StarGazing---------------------------------------------------#
#
# Create Table 4 from the paper: estimates of monthly anomalies from equation 4
#
#-----------------------------------------------------------------------------------------------#


stargaze = as.data.frame(cbind(ema.beta, ema.robust))

starlab = c("Jul","Aug","Sep","Oct","Nov","Dec", "Jan","Feb","Mar","Apr","May","Jun")
starlabs = rep(starlab, 8)
yr = c("`05", "`06", "`07", "`08", "`09", "`10", "`11")
yrs = c("`04","`04","`04","`04","`04","`04", rep(yr, each = 12), "`12","`12","`12","`12",
        "`12","`12")
starlabsyr = paste(starlabs, yrs, sep = '')

## knock off 12 observations to accomodate LDV models
starlabsyr <- starlabsyr[13:96]

## export table to latex format
rownames(stargaze) = starlabsyr
colnames(stargaze) = c('$EMA_k$', 'SE', 'ASE','PCSE')
print(xtable(stargaze), include.rownames = T, sanitize.colnames.function =identity)



